Data Manipulation for Spatial Exante Analytics

Author

Maxwell Mkondiwa

1 Introduction

In this workbook, we show how the data manipulation steps for the LCAS data to do spatial exante analytics. The data manipulation steps include: (a) variable construction, (b) combine the LCAS with geovariables, e.g., soil grids, and (c) combine the LCAS to climate variables. We then show an interactive table that shows the merged data. We then use the data as inputs in subsequent spatial exante workflows.

We first clear the working, load all the packages and import the data from dataverse. The data is on CIMMYT CSISA dataverse: https://data.cimmyt.org/dataset.xhtml?persistentId=hdl:11529/10548507. To download the data, we use “agro’’ R package.

rm(list=ls())         # clear 

library(sp)
library(dplyr)
library(rio)
library(readxl)
library(tidyr)

## Loading required package: agro
if (!require(agro))  source("https://install-github.me/reagro/agro")

ff <- agro::get_data_from_uri("hdl:11529/10548507", ".")
ff
[1] "./hdl_11529_10548507/CSISA_IND_LDS_Whe_2018_Data.csv"
[2] "./hdl_11529_10548507/Districts_Covered.png"          
[3] "./hdl_11529_10548507/LDS_Info.csv"                   
[4] "./hdl_11529_10548507/LDS_Variables_Details.csv"      
[5] "./hdl_11529_10548507/LDS_Whe_2018_R_Markdown.html"   
[6] "./hdl_11529_10548507/LDS_Wheat_2018_Script.R"        
[7] "./hdl_11529_10548507/ODK_Prog_Choices_Sheet.csv"     
[8] "./hdl_11529_10548507/ODK_Prog_Survey_Sheet.csv"      
LDS <- read.csv("./hdl_11529_10548507/CSISA_IND_LDS_Whe_2018_Data.csv", stringsAsFactors=FALSE)

2 Variable construction

# Conversions

LDS$C.q306_cropLarestAreaHA=LDS$C.q306_cropLarestAreaAcre*0.405 #acre to ha
LDS$yield_kgperha=LDS$L.tonPerHectare*1000                      #t/ha to kg per ha
LDS$L.q607_farmGatePricePerKg=LDS$L.q607_farmGatePrice/100      # convert to price per kg

# Calculate N, P applied
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="10_26_26"]=0.10
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="12_32_16"]=0.12
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.20
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20
LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20
LDS$F.q51071_gradeNPKN=as.numeric(LDS$F.q51071_gradeNPKN)

LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26
LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="12_32_16"]=0.32
LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="14_35_14"]=0.35
LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13
LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20
LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20
LDS$F.q51071_gradeNPKP=as.numeric(LDS$F.q51071_gradeNPKP)

LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26
LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="12_32_16"]=0.16
LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14
LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13
LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.13
LDS$F.q51071_gradeNPKK=as.numeric(LDS$F.q51071_gradeNPKK)

# NPKS -----------
LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16
LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20
LDS$F.q51211_gradeNPKSN=as.numeric(LDS$F.q51211_gradeNPKSN)

LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16
LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20
LDS$F.q51211_gradeNPKSP=as.numeric(LDS$F.q51211_gradeNPKSP)

LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16
LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20
LDS$F.q51211_gradeNPKSK=as.numeric(LDS$F.q51211_gradeNPKSK)

LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.13
LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.13
LDS$F.q51211_gradeNPKSS=as.numeric(LDS$F.q51211_gradeNPKSS)

# Nutrient Content ----------------------
# Taken from Cedrez, Chamberlain, Guo and Hijmans, p3
### N -----------------------------------
LDS$F.totAmtDAPN=LDS$F.totAmtDAP*0.18 
LDS$F.totAmtUreaN=LDS$F.totAmtUrea*0.46
LDS$F.totAmtNPKN=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKN
LDS$F.totAmtTSPN=LDS$F.totAmtTSP*0
LDS$F.totAmtSSPN=LDS$F.totAmtSSP*0
LDS$F.totAmtNPKSN=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSN

LDS$N=rowSums(LDS[,c("F.totAmtDAPN","F.totAmtUreaN","F.totAmtNPKN","F.totAmtTSPN","F.totAmtSSPN","F.totAmtNPKSN")],na.rm = TRUE)
LDS$Nperha=LDS$N/LDS$C.q306_cropLarestAreaHA
LDS$NperhaSq=LDS$Nperha*LDS$Nperha

### P ------------------------------------
LDS$F.totAmtDAPP=LDS$F.totAmtDAP*0.46
LDS$F.totAmtUreaP=LDS$F.totAmtUrea*0
LDS$F.totAmtNPKP=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKP
LDS$F.totAmtTSPP=LDS$F.totAmtTSP*0.45
LDS$F.totAmtSSPP=LDS$F.totAmtSSP*0.2
LDS$F.totAmtNPKSP=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSP

LDS$P2O5=rowSums(LDS[,c("F.totAmtDAPP","F.totAmtUreaP","F.totAmtNPKP","F.totAmtTSPP","F.totAmtSSPP","F.totAmtNPKSP")],na.rm = TRUE)
LDS$P2O5perha=LDS$P2O5/LDS$C.q306_cropLarestAreaHA

# Creating dummy variables ------------------------
LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="female"]=1
LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="male"]=0

varieties=read.csv("LDS wheat variety maturity class.csv")
LDS=merge(LDS,varieties, by="D.q410_varName",all.x=TRUE)
LDS$variety_type_NMWV[LDS$variety_type=="NMWV"]=1
LDS$variety_type_NMWV[LDS$variety_type=="EMWV"]=0
LDS$variety_type_NMWV=as.numeric(LDS$variety_type_NMWV)

# Sowing time new --------------------------------------------------------------
LDS$Sowdate=LDS$D.seedingSowingTransplanting
library(tidyr)
LDS=LDS %>% separate(Sowdate, c("Sday","Smonth", "Syear"))
table(LDS$Sday)

 01  02  03  04  05  06  07  08  09  10  11  12  13  14  15  16  17  18  19  20 
 99 156 103 102 268 153 146 177 111 371  93 167 135 149 560 240 188 289 116 492 
 21  22  23  24  25  26  27  28  29  30  31 
222 454 415 332 557 165 147 369 375 481  16 
table(LDS$Smonth)

 Dec  Jan  Nov  Oct 
2301   60 5274   13 
table(LDS$Syear)

  17   18 
7588   60 
LDS$Smonth_issues=0

LDS$Smonth_issues[LDS$Smonth%in%c("11","12","14","15","17","18","20",
                          "22","23","24","25","26","27","29")]=1
LDS$Smonth[LDS$Smonth%in%c("11","12","14","15","17","18","20","22","23","24","25","26","27","29")]="Nov"

LDS$Sday[LDS$Smonth_issues%in%c(1)]=LDS$Smonth[LDS$Smonth_issues%in%c(1)]          
LDS$Syear[LDS$Syear==17]=2017
LDS$Syear[LDS$Syear==18]=2018
LDS$Syear[LDS$Syear==19]=2019

LDS$SowDate_Cleaned=paste(LDS$Sday,LDS$Smonth,LDS$Syear, sep="/")

library(anytime)
LDS$SowDate_Cleaned_Datefmt=anydate(LDS$SowDate_Cleaned)

library(lubridate)

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
LDS <- LDS %>%
  mutate(., Sowing_week = floor_date(SowDate_Cleaned_Datefmt, unit = "week"))
library(ggplot2)

SowingDates_2017_2019=ggplot(LDS, aes(x=factor(Sowing_week)))+
  geom_bar(stat="count", width=0.7, fill="steelblue")+
  theme_minimal()+
  labs(x="Sowing week")+
  coord_flip()
SowingDates_2017_2019

#ggsave("figures/SowingDates_2017_2019.png", dpi=300)

# Rabi season
LDS$Rabi2017_18=0
LDS$Rabi2017_18[LDS$SowDate_Cleaned_Datefmt< "2018-06-01"]=1
LDS$Sowing_Date_Early=0
LDS$Sowing_Date_Early[LDS$SowDate_Cleaned_Datefmt<"2017-11-21" & LDS$Rabi2017_18==1]=1
LDS$Sowing_Date_Early[LDS$SowDate_Cleaned_Datefmt<"2018-11-21" & LDS$Rabi2017_18==0]=1

# Harvesting time --------------------------------------------------------------
LDS$PrevCropHarvDate=LDS$D.pCHarv
LDS=LDS %>% separate(PrevCropHarvDate, c("Hday","Hmonth", "Hyear"))
LDS$Hyear[LDS$Hyear==17]=2017
LDS$Hyear[LDS$Hyear==18]=2018
LDS$Hyear[LDS$Hyear==19]=2019

LDS$Hmonthnum[LDS$Hmonth=="Jan"]=1
LDS$Hmonthnum[LDS$Hmonth=="Apr"]=4
LDS$Hmonthnum[LDS$Hmonth=="Jun"]=6
LDS$Hmonthnum[LDS$Hmonth=="Jul"]=7
LDS$Hmonthnum[LDS$Hmonth=="Aug"]=8
LDS$Hmonthnum[LDS$Hmonth=="Sep"]=9
LDS$Hmonthnum[LDS$Hmonth=="Oct"]=10
LDS$Hmonthnum[LDS$Hmonth=="Nov"]=11
LDS$Hmonthnum[LDS$Hmonth=="Dec"]=12

LDS$Hdaynum=as.numeric(LDS$Hday)
LDS$Hmonthnum=as.numeric(LDS$Hmonthnum)
LDS$Hyearnum=as.numeric(LDS$Hyear)

library(lubridate)
LDS <- LDS %>% 
  mutate(PrevCropHarvest_date_cleaned=make_date(year=Hyearnum,month=Hmonthnum,day=Hdaynum))

LDS$JanuaryFirst2017=ymd("2017-01-01")
LDS$JanuaryFirst2018=ymd("2018-01-01")
LDS$JanuaryFirst2019=ymd("2019-01-01")

LDS$PrevCropHarvestDayfor1stJan2017<- LDS$PrevCropHarvest_date_cleaned - LDS$JanuaryFirst2017
LDS$PrevCropHarvestDayfor1stJan2018<- LDS$PrevCropHarvest_date_cleaned - LDS$JanuaryFirst2018
LDS$PrevCropHarvestDayfor1stJan2019<- LDS$PrevCropHarvest_date_cleaned - LDS$JanuaryFirst2019

LDS$PrevCropHarvestDayfor1stJan2017_num=as.numeric(LDS$PrevCropHarvestDayfor1stJan2017)
LDS$PrevCropHarvestDayfor1stJan2018_num=as.numeric(LDS$PrevCropHarvestDayfor1stJan2018)
LDS$PrevCropHarvestDayfor1stJan2019_num=as.numeric(LDS$PrevCropHarvestDayfor1stJan2019)

LDS$PrevCropHarvestDayfor1stJan2017[LDS$PrevCropHarvestDayfor1stJan2017<0 | LDS$PrevCropHarvestDayfor1stJan2017>365]=0
LDS$PrevCropHarvestDayfor1stJan2018[LDS$PrevCropHarvestDayfor1stJan2018<0 | LDS$PrevCropHarvestDayfor1stJan2018>365]=0
LDS$PrevCropHarvestDayfor1stJan2019[LDS$PrevCropHarvestDayfor1stJan2019<0 | LDS$PrevCropHarvestDayfor1stJan2019>365]=0

LDS$PrevCropHarvestDay=LDS$PrevCropHarvestDayfor1stJan2017+LDS$PrevCropHarvestDayfor1stJan2018+LDS$PrevCropHarvestDayfor1stJan2019

LDS$PrevCropHarvestDay=as.numeric(LDS$PrevCropHarvestDay)

# Irrigation
LDS$G.q5301_irrigAvail[LDS$G.q5301_irrigAvail=="Yes"]="yes"
LDS$G.q5301_irrigAvail_dum[LDS$G.q5301_irrigAvail=="yes"]=1
LDS$G.q5301_irrigAvail_dum[LDS$G.q5301_irrigAvail=="no"]=0
LDS$G.q5305_irrigTimes_onevsall[LDS$G.q5305_irrigTimes==1]=1
LDS$G.q5305_irrigTimes_onevsall[LDS$G.q5305_irrigTimes>=2]=0
LDS$G.q5305_irrigTimes_twovs1[LDS$G.q5305_irrigTimes==2]=1
LDS$G.q5305_irrigTimes_twovs1[LDS$G.q5305_irrigTimes==1]=0
LDS$G.q5305_irrigTimes_threevs1[LDS$G.q5305_irrigTimes==3]=1
LDS$G.q5305_irrigTimes_threevs1[LDS$G.q5305_irrigTimes==1]=0
LDS$G.q5305_irrigTimes_fourplusvs1[LDS$G.q5305_irrigTimes>=4]=1
LDS$G.q5305_irrigTimes_fourplusvs1[LDS$G.q5305_irrigTimes==1]=0
# Less than 2 versus more irrigation
LDS$G.q5305_irrigTimes_Threeabove[LDS$G.q5305_irrigTimes>=3]=1
LDS$G.q5305_irrigTimes_Threeabove[LDS$G.q5305_irrigTimes<=2]=0

library(stringr)
library(dplyr)

LDS$IrrigSource=NA
LDS$IrrigSource[LDS$G.q5302_irrigSource%in%c("canal","Canal","Canal Other","Canal Pond","Canal Pond Lift","Canal Lift","Pond Dugwell Tank","Pond Lift","River", "River Canal"," River Canal Lift","River Canal Pond")]="Surface"

LDS$IrrigSource[LDS$G.q5302_irrigSource%in%c("Shallow Tubewell","shallowTubeWell","Shallow TubeWell","ShallowTubewell","ShallowTubeWell","ShallowTubeWell","ShallowTubeWell Dugwell","ShallowTubeWell Lift","ShallowTubeWell Other","ShallowTubeWell Tank")]="ShallowTubewell"

LDS$IrrigSource[LDS$G.q5302_irrigSource%in%c("Deep Tubewell","DeepTubewel","DeepTubewell","DeepTubeWell","DeepTubeWell Dugwell")]="DeepTubeWell"

LDS$IrrigSource[LDS$G.q5302_irrigSource%in%c("Canal Pond DeepTubeWell","","Pond DeepTubeWell","Pond ShallowTubeWell","River Canal DeepTubeWell","River Canal ShallowTubeWell")]="Conjuctive"

LDS$IrrigSource[LDS$G.q5301_irrigAvail%in%c("no")]="None"

# Energy source 
LDS$PumpEnergySource=LDS$H.q5406_pumpEnergy
LDS$PumpEnergySource[LDS$PumpEnergySource=="Dielsel"]="Diesel"
LDS$PumpEnergySource[LDS$PumpEnergySource==""]=NA

LDS$PumpEnergySource <- relevel(factor(LDS$PumpEnergySource), ref = "Diesel")
LDS$I.q5502_droughtSeverity<-relevel(factor(LDS$I.q5502_droughtSeverity), ref = "None")
LDS$IrrigSource<-relevel(factor(LDS$IrrigSource), ref = "None")

# Weed management ---------------------
LDS$Weedmanaged[LDS$J.manualWeedTimes!=0 | LDS$J.herbAppTimes!=0]=1
LDS$Weedmanaged[LDS$J.manualWeedTimes==0 & LDS$J.herbAppTimes==0]=0
LDS$Weedherb[LDS$J.herbAppTimes!=0]=1
LDS$Weedherb[LDS$J.herbAppTimes==0]=0
LDS$Weedmanual[LDS$J.manualWeedTimes!=0]=1
LDS$Weedmanual[LDS$J.manualWeedTimes==0]=0

LDS$variety_type_NMWV=as.numeric(LDS$variety_type_NMWV)

LDS$I.q5505_weedSeverity_num[LDS$I.q5505_weedSeverity=="None"]=1
LDS$I.q5505_weedSeverity_num[LDS$I.q5505_weedSeverity=="Low"]=2
LDS$I.q5505_weedSeverity_num[LDS$I.q5505_weedSeverity=="Medium"]=3
LDS$I.q5505_weedSeverity_num[LDS$I.q5505_weedSeverity=="High"]=4

LDS$I.q5506_insectSeverity_num[LDS$I.q5506_insectSeverity=="None"]=1
LDS$I.q5506_insectSeverity_num[LDS$I.q5506_insectSeverity=="Low"]=2
LDS$I.q5506_insectSeverity_num[LDS$I.q5506_insectSeverity=="Medium"]=3
LDS$I.q5506_insectSeverity_num[LDS$I.q5506_insectSeverity=="High"]=4

LDS$I.q5509_diseaseSeverity_num[LDS$I.q5509_diseaseSeverity=="None"]=1
LDS$I.q5509_diseaseSeverity_num[LDS$I.q5509_diseaseSeverity=="Low"]=2
LDS$I.q5509_diseaseSeverity_num[LDS$I.q5509_diseaseSeverity=="Medium"]=3
LDS$I.q5509_diseaseSeverity_num[LDS$I.q5509_diseaseSeverity=="High"]=4

LDS$I.q5504_floodSeverity_num[LDS$I.q5504_floodSeverity=="None"]=1
LDS$I.q5504_floodSeverity_num[LDS$I.q5504_floodSeverity=="Low"]=2
LDS$I.q5504_floodSeverity_num[LDS$I.q5504_floodSeverity=="Medium"]=3
LDS$I.q5504_floodSeverity_num[LDS$I.q5504_floodSeverity=="High"]=4

LDS$I.q5502_droughtSeverity_num[LDS$I.q5502_droughtSeverity=="None"]=1
LDS$I.q5502_droughtSeverity_num[LDS$I.q5502_droughtSeverity=="Low"]=2
LDS$I.q5502_droughtSeverity_num[LDS$I.q5502_droughtSeverity=="Medium"]=3
LDS$I.q5502_droughtSeverity_num[LDS$I.q5502_droughtSeverity=="High"]=4

LDS$D.prevCrop_Fallow[LDS$D.prevCrop=="Fallow"]=1
LDS$D.prevCrop_Fallow[LDS$D.prevCrop!="Fallow"]=0

LDS$D.prevCrop_Rice[LDS$D.prevCrop=="Rice"]=1
LDS$D.prevCrop_Rice[LDS$D.prevCrop!="Rice"]=0

LDS$Nperha_100belowvsabove[LDS$Nperha>=100]=1
LDS$Nperha_100belowvsabove[LDS$Nperha<100]=0

LDS$Nperha_100belowvs100_150[LDS$Nperha>=100 & LDS$Nperha<=150]=1
LDS$Nperha_100belowvs100_150[LDS$Nperha<100]=0

LDS$Nperha_100belowvs150_200[LDS$Nperha>=150 & LDS$Nperha<=200]=1
LDS$Nperha_100belowvs150_200[LDS$Nperha<100]=0

LDS$Nperha_100belowvs200_250[LDS$Nperha>=200 &LDS$Nperha<=250]=1
LDS$Nperha_100belowvs200_250[LDS$Nperha<100]=0

LDS$Nperha_100belowvs200plus[LDS$Nperha>=200]=1
LDS$Nperha_100belowvs200plus[LDS$Nperha<100]=0


# Education
LDS$A.q112_fEdu_new=LDS$A.q112_fEdu

LDS$A.q112_fEdu_new[LDS$A.q112_fEdu_new=="masters"]="Postgrad"
LDS$A.q112_fEdu_new[LDS$A.q112_fEdu_new=="phD"]="Postgrad"


## Creating the key explanatory variables

### Sowing --------------------------------------------------

LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt<="2017-11-10" & LDS$Rabi2017_18==1]="T1_10Nov"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt<="2018-11-21" & LDS$Rabi2017_18==0]="T1_10Nov"

LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2017-11-11"& LDS$SowDate_Cleaned_Datefmt<="2017-11-20" & LDS$Rabi2017_18==1]="T2_20Nov"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2018-11-11"& LDS$SowDate_Cleaned_Datefmt<="2018-11-20" & LDS$Rabi2017_18==0]="T2_20Nov"

LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2017-11-21"& LDS$SowDate_Cleaned_Datefmt<="2017-11-30" & LDS$Rabi2017_18==1]="T3_30Nov"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2018-11-21"& LDS$SowDate_Cleaned_Datefmt<="2018-11-30" & LDS$Rabi2017_18==0]="T3_30Nov"


LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2017-12-1"& LDS$SowDate_Cleaned_Datefmt<="2017-12-15" & LDS$Rabi2017_18==1]="T4_15Dec"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2018-12-1"& LDS$SowDate_Cleaned_Datefmt<="2018-12-15" & LDS$Rabi2017_18==0]="T4_15Dec"

LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2017-12-1"& LDS$SowDate_Cleaned_Datefmt<="2017-12-15" & LDS$Rabi2017_18==1]="T4_15Dec"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2018-12-1"& LDS$SowDate_Cleaned_Datefmt<="2018-12-15" & LDS$Rabi2017_18==0]="T4_15Dec"

LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2017-12-16" & LDS$Rabi2017_18==1]="T5_16Dec"
LDS$Sowing_Date_Schedule[LDS$SowDate_Cleaned_Datefmt>="2018-12-16" & LDS$Rabi2017_18==0]="T5_16Dec"

table(LDS$Sowing_Date_Schedule,LDS$Rabi2017_18)
          
              1
  T1_10Nov  416
  T2_20Nov 1704
  T3_30Nov 3167
  T4_15Dec 1696
  T5_16Dec  665
summary(LDS$Sowing_Date_Schedule)
   Length     Class      Mode 
     7648 character character 
LDS$Sowing_Date_Schedule_rating_num[LDS$Sowing_Date_Schedule=="T5_16Dec"]=1
LDS$Sowing_Date_Schedule_rating_num[LDS$Sowing_Date_Schedule=="T4_15Dec"]=2
LDS$Sowing_Date_Schedule_rating_num[LDS$Sowing_Date_Schedule=="T3_30Nov"]=3
LDS$Sowing_Date_Schedule_rating_num[LDS$Sowing_Date_Schedule=="T2_20Nov"]=4
LDS$Sowing_Date_Schedule_rating_num[LDS$Sowing_Date_Schedule=="T1_10Nov"]=5

LDS$Sowing_Date_Schedule=ordered(LDS$Sowing_Date_Schedule,levels=c("T5_16Dec","T4_15Dec","T3_30Nov","T2_20Nov","T1_10Nov"))


# Irrigation management -------------------------------
table(LDS$G.q5305_irrigTimes)

   1    2    3    4    5 
1026 3812 2373  396   14 
LDS$G.q5305_irrigTimes_cat[LDS$G.q5305_irrigTimes==1]="One"
LDS$G.q5305_irrigTimes_cat[LDS$G.q5305_irrigTimes == 2] <- "Two"
LDS$G.q5305_irrigTimes_cat[LDS$G.q5305_irrigTimes == 3] <- "Three"
LDS$G.q5305_irrigTimes_cat[LDS$G.q5305_irrigTimes >= 4] <- "Fourplus"
table(LDS$G.q5305_irrigTimes_cat)

Fourplus      One    Three      Two 
     410     1026     2373     3812 
LDS$G.q5305_irrigTimes_cat <- ordered(LDS$G.q5305_irrigTimes_cat, levels = c("One", "Two", "Three","Fourplus"))

3 Geovariables

The survey data contains approximate GPS locations of the plots. We can use these to extract soil and climate variables that are then included in crop response function.

# Function to add Geo-variables 

library(sf)
Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(sp)
library(rgdal)
Please note that rgdal will be retired by the end of 2023,
plan transition to sf/stars/terra functions using GDAL and PROJ
at your earliest convenience.

rgdal: version: 1.5-29, (SVN revision 1165M)
Geospatial Data Abstraction Library extensions to R successfully loaded
Loaded GDAL runtime: GDAL 3.2.1, released 2020/12/29
Path to GDAL shared files: C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/gdal
GDAL binary built with GEOS: TRUE 
Loaded PROJ runtime: Rel. 7.2.1, January 1st, 2021, [PJ_VERSION: 721]
Path to PROJ shared files: C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/proj
PROJ CDN enabled: FALSE
Linking to sp version:1.4-6
To mute warnings of possible GDAL/OSR exportToProj4() degradation,
use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
Overwritten PROJ_LIB was C:/Users/MMKONDIWA/OneDrive - CIMMYT/Documents/R/win-library/4.1/rgdal/proj
library(terra)
terra 1.5.21

Attaching package: 'terra'
The following object is masked from 'package:rgdal':

    project
The following object is masked from 'package:ggplot2':

    arrow
The following object is masked from 'package:tidyr':

    extract
The following object is masked from 'package:dplyr':

    src
library(raster)

Attaching package: 'raster'
The following object is masked from 'package:dplyr':

    select
library(geodata)

# add_secondary_lcas <- function (df) {
#   # Remove duplicates and NAs in geo-coordinates
#   #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))
#   #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))
#   df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))
#   df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))
#   df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$O.largestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))
#   df_sf=st_as_sf(df_sp)
# 
#   population=population(2020,05,path=tempdir())
#   population_geodata=terra::extract(population,vect(df_sf),fun=mean,df=TRUE)
#   elevationglobal_geodata=elevation_global(0.5,path=tempdir())
#   elevation_geodata=terra::extract(elevationglobal_geodata,vect(df_sf),fun=mean,df=TRUE)
#   Soilsand=soil_world("sand",depth=5,path=tempdir())
#   Soilsand_lds=terra::extract(Soilsand,vect(df_sf),fun=mean,df=TRUE)
#   Totalnitrogen=soil_world("nitrogen",depth=5,path=tempdir())
#   Totalnitrogen_lds=terra::extract(Totalnitrogen,vect(df_sf),fun=mean,df=TRUE)
#   soilsoc=soil_world("soc",depth=15,path=tempdir())
#   soilsoc_lds=terra::extract(soilsoc,vect(df_sf),fun=mean,df=TRUE)
# 
#   # Merge all soils and population
#   geodata_df <- list(population_geodata,elevation_geodata,Soilsand_lds,Totalnitrogen_lds,soilsoc_lds)
#   geodata_df=Reduce(function(x, y) merge(x, y, all=TRUE),geodata_df)
#   #geodata_df=return(data.frame(geodata_df))
#   write.csv(geodata_df,paste0("geovariables",".csv"))
#   }
# add_secondary_lcas(LDS)
library(rio)
geovariables=import("geovariables.csv")
LDS=cbind(LDS,geovariables)

4 Climate variables

The geodata R package has aggregated rainfall and temperature variables. However, we need climate variables specific to the corresponding growing season.

library(ncdf4)
library(raster)
library(terra)
library(sf)
library(data.table)

Attaching package: 'data.table'
The following object is masked from 'package:raster':

    shift
The following object is masked from 'package:terra':

    shift
The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    yday, year
The following objects are masked from 'package:dplyr':

    between, first, last
library(exactextractr)


#RUN ONCE
#  add_temp_precip_lcas <- function (df) {
#    # Remove duplicates and NAs in geo-coordinates
#    #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))
#    #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))
#    df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))
#    df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))
#    df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$OlargestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))
#    
#    df_sf=st_as_sf(df_sp)
#    version = "501"
#    start.yr = 1960
#    num.yrs = ifelse(version=="501", (2017-start.yr+1), (2010-start.yr+1))
#    udel.temp.filename = paste0("air.mon.mean.v",version,".nc")
#    udel.precip.filename = paste0("precip.mon.total.v",version,".nc")
#    # Output location to write results to
#    out.filename = paste0("UDel.aggregated.public.v",version,".csv")
#    out.filename2017 = paste0("UDel.aggregated2017.public.v",version,".csv")
#    yr.offset = start.yr-1900
#    temps = subset(brick(udel.temp.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))
#    precip = subset(brick(udel.precip.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))
#    # 1. Aggregate across months within a year:  mean for temp, sum for precip
#    annual.temps = stackApply(temps, indices = rep(1:num.yrs, each=12), fun=mean)
#    annual.precip = stackApply(precip, indices = rep(1:num.yrs, each=12), fun=sum)
#    # 2. Aggregate spatially.
#    annual.temps = rotate(annual.temps)
#    annual.precip = rotate(annual.precip)
# 
#    df_sf$idmatching=1:nrow(df_sf)
# 
#    # Aggregate temperatures
#    ctry.temps = rbindlist(lapply(1:num.yrs, FUN=function(yr) {
#    ctry.temps = extract(annual.temps[[yr]], df_sf)
#    # Create data.table of results for this year, including the year
#    return(data.table(hhid=df_sf$idmatching, temp=ctry.temps, yr=yr-1+start.yr))
#  }))
# 
#    #Aggregate precipitation
#    # Note here we're going to multiply precip data by 10.
#    # The UDel data is in cm/year, but Burke et al use mm/year.
#    ctry.precip = rbindlist(lapply(1:num.yrs, FUN=function(yr) {
#    cropped.precip = annual.precip[[yr]]*10
#    ctry.precip = extract(cropped.precip, df_sf)
#    # Create data.table of results for this year, including the year
#    return(data.table(hhid=df_sf$idmatching, precip=ctry.precip, yr=yr-1+start.yr))
#  }))
# 
#  # Combine these results and save
#    all.udel.data = merge(ctry.temps, ctry.precip, by=c("hhid", "yr"))
#    all.udel.data_2017=subset(all.udel.data,all.udel.data$yr=="2017")
#    fwrite(all.udel.data, out.filename)
#    fwrite(all.udel.data_2017, out.filename2017)
#  }
# 
# add_temp_precip_lcas(LDS)

## Temperature and Rainfall -------------------
tempprecip=read.csv("UDel.aggregated2017.public.v501.csv")
tempprecipall=read.csv("UDel.aggregated.public.v501.csv")

tempprecipallwide=reshape(tempprecipall, direction = "wide", idvar = "hhid", timevar = "yr")

tempprecipallwide_small=subset(tempprecipallwide, select=c("precip.2007","temp.2008","precip.2008",
"temp.2009","precip.2009","temp.2010","precip.2010","temp.2011","precip.2011","temp.2012","precip.2012",
"temp.2013","precip.2013","temp.2014","precip.2014","temp.2015","precip.2015","temp.2016","precip.2016","temp.2017","precip.2017"))

LDS=cbind(LDS,tempprecip,tempprecipallwide_small)

# Interactive table of the data
library(reactable)
reactable(LDS)
write.csv(LDS,"LDS_wheat_public_cleaned.csv")
save.image("LDS_Public_Workspace.RData")